#=============================================================================#
#
# PROJECT:        Who Pays for Peace?
# AUTHORS:        ** anonymized for review **
# CONTACT:        ** anonymized for review **
# LAST MODIFIED:  July 5, 2022
# 
#=============================================================================#
#
#  General Data Cleaning File (T2):
#  This R file contains the code used to clean the data so readers can follow our steps.
#  However, due to privacy reasons, the raw data-set is not provided online.
# 
#=============================================================================#

# Initial settings ------------------------------------------------------------
# Set the working directory by clicking the ".Rproj" file
# Restart an R session before running this script

#rm(list=ls())
#getwd()

## Install and load all necessary packages ------------------------------------
# ipak function: install and load multiple R packages.
# check to see if packages are installed. Install them if they are not, then load them into the R session.

ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

packages <- c("haven", "tidyverse", "ggplot2", "fastDummies") 
ipak(packages)

## Load and inspect the data --------------------------------------------------
rawdata_t2 <- readRDS("data/T2-raw-data.rds")
summary(rawdata_t2)


# Data cleaning  --------------------------------------------------------------

## Drop unnecessary variables -------------------------------------------------
drop <- c("StartDate","EndDate", "Status", "IPAddress", "Progress", "Finished", "RecordedDate", "ExternalReference", "LocationLatitude", "LocationLongitude", "DistributionChannel", "Timing_First Click", "Timing_Last Click", "Timing_Page Submit", "Timing_Click Count", "status_NK2", "additional", "surveyName") #list unnecessary variables
rawdata_t2 = rawdata_t2[,!(names(rawdata_t2) %in% drop)] #drop
summary(rawdata_t2)
str(rawdata_t2)

## Dummy variable for respondents who made the survey AZ (Robustness) ---------
rawdata_t2$UserLanguage <- as.factor( trimws(rawdata_t2$UserLanguage) ) #trim whitespace
rawdata_t2$UserLanguage <- recode_factor(rawdata_t2$UserLanguage, #nicer label
                                      "AZ-AZ" = "AZ", "EN" = "EN")
rawdata_t2$AZLanguage <- as.factor( ifelse(rawdata_t2$UserLanguage == 'AZ', 1, 0) ) #dummy
summary(rawdata_t2$AZLanguage)

## Drop respondents without proper consent ------------------------------------
rawdata_t2 <- subset(rawdata_t2, rawdata_t2$ifc_1 == 1 & 
                    rawdata_t2$ifc_2 == 1 & 
                    rawdata_t2$ifc_3 == 1)

## Drop people who rushed through the survey ----------------------------------
# as indicated in the pre-registration
rawdata_t2$duration <- rawdata_t2$`Duration (in seconds)`/60 #duration (in minutes)
summary(rawdata_t2$duration) #again, median time = 15 minutes
qplot(rawdata_t2$duration, geom="histogram", bins = 100)
qplot(rawdata_t2$duration, geom="boxplot")#several outliers who took multiple days.
#might have finished the survey after the reminder, hence we do not delete those.
quantile(rawdata_t2$duration, 0.025) #detect lower bound to delete those rushers.
#2.575 makes sense, round to below (more consistent with T1)
rawdata_t2$rush <-  rawdata_t2$duration <= 2
summary(rawdata_t2$rush)
#we can do a robustness check with and without those outliers.
#for now, delete everyone who took less than 1.35 minutes to complete the survey
rawdata_t2 <- subset(rawdata_t2, rawdata_t2$rush != T) #delete rushers

## Clean and recode pre-treatment variables -----------------------------------
# ID to merge T1 and T2
rawdata_t2 <- rename(rawdata_t2, ID = RecipientFirstName) #rename

# Gender (values 88 and 99 set as missing data, and made into dummy)
summary(as.factor(rawdata_t2$gender))
rawdata_t2$female <- as.factor(ifelse(rawdata_t2$gender %in% 88:99, NA, identity(rawdata_t2$gender)))
summary(as.factor(rawdata_t2$female))#gender dummy: female(=1)

## Create experimental condition and outcome variables ------------------------

# Condition variables for compensations 
rawdata_t2$comp_condition <- NA 
rawdata_t2$comp_condition[which(!is.na(rawdata_t2$compensation_control))] <- 'Control'
rawdata_t2$comp_condition[which(!is.na(rawdata_t2$compensation_Azeri))] <- 'Azerbaijan'
rawdata_t2$comp_condition[which(!is.na(rawdata_t2$compensation_Armenia))] <- 'Armenia'
rawdata_t2$comp_condition[which(!is.na(rawdata_t2$compensation_both))] <- 'Both'
rawdata_t2$comp_condition[which(!is.na(rawdata_t2$compensation_ic))] <- 'Int. Comm.'
rawdata_t2$comp_condition <- as.factor(rawdata_t2$comp_condition)
summary(rawdata_t2$comp_condition)
str(rawdata_t2$comp_condition)

# Outcome variable for compensations
rawdata_t2$compensation <- NA 
rawdata_t2$compensation <- coalesce(rawdata_t2$compensation_control, 
                                rawdata_t2$compensation_Azeri,
                                rawdata_t2$compensation_Armenia,
                                rawdata_t2$compensation_both,
                                rawdata_t2$compensation_ic)
rawdata_t2$compensation <- as.integer(rawdata_t2$compensation)
summary(rawdata_t2$compensation)
str(rawdata_t2$compensation)

# Condition variable for punishments
rawdata_t2$punish_condition <- NA 
rawdata_t2$punish_condition[which(!is.na(rawdata_t2$punishment_control))] <- 'Control'
rawdata_t2$punish_condition[which(!is.na(rawdata_t2$punishment_Azeri))] <- 'Azerbaijan'
rawdata_t2$punish_condition[which(!is.na(rawdata_t2$punishment_Armenia))] <- 'Armenia'
rawdata_t2$punish_condition[which(!is.na(rawdata_t2$punishment_both))] <- 'Both'
rawdata_t2$punish_condition <- as.factor(rawdata_t2$punish_condition)
summary(rawdata_t2$punish_condition)
str(rawdata_t2$punish_condition)

# Outcome variable for punishments
rawdata_t2$punishment <- NA 
rawdata_t2$punishment <- coalesce(rawdata_t2$punishment_control, 
                              rawdata_t2$punishment_Azeri,
                              rawdata_t2$punishment_Armenia,
                              rawdata_t2$punishment_both)
rawdata_t2$punishment <- as.integer(rawdata_t2$punishment)
summary(rawdata_t2$punishment)

# Condition variable for route_1
rawdata_t2$route_1_condition <- NA 
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_300_control))] <- 'Control'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_450_control))] <- 'Control'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_600_control))] <- 'Control'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_300_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_450_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_600_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_300_Armenia))] <- 'Armenia'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_450_Armenia))] <- 'Armenia'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_600_Armenia))] <- 'Armenia'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_300_both))] <- 'Both'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_450_both))] <- 'Both'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_600_both))] <- 'Both'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_300_ic))] <- 'Int. Comm.'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_450_ic))] <- 'Int. Comm.'
rawdata_t2$route_1_condition[which(!is.na(rawdata_t2$route1_600_ic))] <- 'Int. Comm.'
rawdata_t2$route_1_condition <- as.factor(rawdata_t2$route_1_condition)
summary(rawdata_t2$route_1_condition)
str(rawdata_t2$route_1_condition)

# Outcome variable for route_1
rawdata_t2$route_1 <- NA 
rawdata_t2$route_1 <- coalesce(rawdata_t2$route1_300_control, 
                               rawdata_t2$route1_450_control,
                               rawdata_t2$route1_600_control,
                               rawdata_t2$route1_300_Azeri,
                               rawdata_t2$route1_450_Azeri,
                               rawdata_t2$route1_600_Azeri,
                               rawdata_t2$route1_300_Armenia,
                               rawdata_t2$route1_450_Armenia,
                               rawdata_t2$route1_600_Armenia,
                               rawdata_t2$route1_300_both,
                               rawdata_t2$route1_450_both,
                               rawdata_t2$route1_600_both,
                               rawdata_t2$route1_300_ic,
                               rawdata_t2$route1_450_ic,
                               rawdata_t2$route1_600_ic)
rawdata_t2$route_1 <- as.integer(rawdata_t2$route_1)
summary(rawdata_t2$route_1)
str(rawdata_t2$route_1)

# Condition variable for route_2
rawdata_t2$route_2_condition <- NA 
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_300_control))] <- 'Control'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_450_control))] <- 'Control'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_600_control))] <- 'Control'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_300_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_450_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_600_Azeri))] <- 'Azerbaijan'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_300_Armenia))] <- 'Armenia'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_450_Armenia))] <- 'Armenia'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_600_Armenia))] <- 'Armenia'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_300_both))] <- 'Both'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_450_both))] <- 'Both'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_600_both))] <- 'Both'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_300_ic))] <- 'Int. Comm.'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_450_ic))] <- 'Int. Comm.'
rawdata_t2$route_2_condition[which(!is.na(rawdata_t2$route2_600_ic))] <- 'Int. Comm.'
rawdata_t2$route_2_condition <- as.factor(rawdata_t2$route_2_condition)
summary(rawdata_t2$route_2_condition)
str(rawdata_t2$route_2_condition)

# Outcome variable for route_2
rawdata_t2$route_2 <- NA 
rawdata_t2$route_2 <- coalesce(rawdata_t2$route2_300_control, 
                               rawdata_t2$route2_450_control,
                               rawdata_t2$route2_600_control,
                               rawdata_t2$route2_300_Azeri,
                               rawdata_t2$route2_450_Azeri,
                               rawdata_t2$route2_600_Azeri,
                               rawdata_t2$route2_300_Armenia,
                               rawdata_t2$route2_450_Armenia,
                               rawdata_t2$route2_600_Armenia,
                               rawdata_t2$route2_300_both,
                               rawdata_t2$route2_450_both,
                               rawdata_t2$route2_600_both,
                               rawdata_t2$route2_300_ic,
                               rawdata_t2$route2_450_ic,
                               rawdata_t2$route2_600_ic)
rawdata_t2$route_2 <- as.integer(rawdata_t2$route_2)
summary(rawdata_t2$route_2)
str(rawdata_t2$route_2)

# Condition variable for trust
rawdata_t2$trust_condition <- NA 
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_control))] <- 'Control'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_control_1))] <- 'Control'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_control_2))] <- 'Control'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Azeri))] <- 'Azerbaijan'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Azeri_1   ))] <- 'Azerbaijan'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Azeri_2   ))] <- 'Azerbaijan'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Armenia))] <- 'Armenia'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Armenia_1))] <- 'Armenia'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_Armenia_2))] <- 'Armenia'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_both))] <- 'Both'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_both_1))] <- 'Both'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_both_2))] <- 'Both'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_ic))] <- 'Int. Comm.'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_ic_1))] <- 'Int. Comm.'
rawdata_t2$trust_condition[which(!is.na(rawdata_t2$trust_ic_2))] <- 'Int. Comm.'
rawdata_t2$trust_condition <- as.factor(rawdata_t2$trust_condition)
summary(rawdata_t2$trust_condition)
str(rawdata_t2$trust_condition)

# Outcome variable for trust
rawdata_t2$trust <- NA 
rawdata_t2$trust <- coalesce(rawdata_t2$trust_control, 
                               rawdata_t2$trust_control_1,
                               rawdata_t2$trust_control_2,
                               rawdata_t2$trust_Azeri,
                               rawdata_t2$trust_Azeri_1,
                               rawdata_t2$trust_Azeri_2,
                               rawdata_t2$trust_Armenia,
                               rawdata_t2$trust_Armenia_1,
                               rawdata_t2$trust_Armenia_2,
                               rawdata_t2$trust_both,
                               rawdata_t2$trust_both_2,
                               rawdata_t2$trust_both_2,
                               rawdata_t2$trust_ic,
                               rawdata_t2$trust_ic_1,
                               rawdata_t2$trust_ic_2)
rawdata_t2$trust <- as.integer(rawdata_t2$trust)
summary(rawdata_t2$trust)
str(rawdata_t2$trust)

# Condition variable for mines
rawdata_t2$mines_condition <- NA 
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_control))] <- 'Control'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_control_1))] <- 'Control'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_control_2))] <- 'Control'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Azeri))] <- 'Azerbaijan'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Azeri_1   ))] <- 'Azerbaijan'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Azeri_2   ))] <- 'Azerbaijan'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Armenia))] <- 'Armenia'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Armenia_1))] <- 'Armenia'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_Armenia_2))] <- 'Armenia'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_both))] <- 'Both'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_both_1))] <- 'Both'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_both_2))] <- 'Both'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_ic))] <- 'Int. Comm.'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_ic_1))] <- 'Int. Comm.'
rawdata_t2$mines_condition[which(!is.na(rawdata_t2$mines_ic_2))] <- 'Int. Comm.'
rawdata_t2$mines_condition <- as.factor(rawdata_t2$mines_condition)
summary(rawdata_t2$mines_condition)
str(rawdata_t2$mines_condition)

# Outcome variable for mines
rawdata_t2$mines <- NA 
rawdata_t2$mines <- coalesce(rawdata_t2$mines_control, 
                             rawdata_t2$mines_control_1,
                             rawdata_t2$mines_control_2,
                             rawdata_t2$mines_Azeri,
                             rawdata_t2$mines_Azeri_1,
                             rawdata_t2$mines_Azeri_2,
                             rawdata_t2$mines_Armenia,
                             rawdata_t2$mines_Armenia_1,
                             rawdata_t2$mines_Armenia_2,
                             rawdata_t2$mines_both,
                             rawdata_t2$mines_both_2,
                             rawdata_t2$mines_both_2,
                             rawdata_t2$mines_ic,
                             rawdata_t2$mines_ic_1,
                             rawdata_t2$mines_ic_2)
rawdata_t2$mines <- as.integer(rawdata_t2$mines)
summary(rawdata_t2$mines)
str(rawdata_t2$mines)

# Condition variable for aid
rawdata_t2$aid_condition <- NA 
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_control))] <- 'Control'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_control_1))] <- 'Control'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_control_2))] <- 'Control'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Azeri))] <- 'Azerbaijan'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Azeri_1   ))] <- 'Azerbaijan'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Azeri_2   ))] <- 'Azerbaijan'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Armenia))] <- 'Armenia'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Armenia_1))] <- 'Armenia'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_Armenia_2))] <- 'Armenia'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_both))] <- 'Both'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_both_1))] <- 'Both'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_both_2))] <- 'Both'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_ic))] <- 'Int. Comm.'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_ic_1))] <- 'Int. Comm.'
rawdata_t2$aid_condition[which(!is.na(rawdata_t2$aid_ic_2))] <- 'Int. Comm.'
rawdata_t2$aid_condition <- as.factor(rawdata_t2$aid_condition)
summary(rawdata_t2$aid_condition)
str(rawdata_t2$aid_condition)

# Outcome variable for aid
rawdata_t2$aid <- NA 
rawdata_t2$aid <- coalesce(rawdata_t2$aid_control, 
                             rawdata_t2$aid_control_1,
                             rawdata_t2$aid_control_2,
                             rawdata_t2$aid_Azeri,
                             rawdata_t2$aid_Azeri_1,
                             rawdata_t2$aid_Azeri_2,
                             rawdata_t2$aid_Armenia,
                             rawdata_t2$aid_Armenia_1,
                             rawdata_t2$aid_Armenia_2,
                             rawdata_t2$aid_both,
                             rawdata_t2$aid_both_2,
                             rawdata_t2$aid_both_2,
                             rawdata_t2$aid_ic,
                             rawdata_t2$aid_ic_1,
                             rawdata_t2$aid_ic_2)
rawdata_t2$aid <- as.integer(rawdata_t2$aid)
summary(rawdata_t2$aid)
str(rawdata_t2$aid)

# Condition variable for pow
rawdata_t2$pow_condition <- NA 
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_control))] <- 'Control'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_control_1))] <- 'Control'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_control_2))] <- 'Control'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Azeri))] <- 'Azerbaijan'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Azeri_1   ))] <- 'Azerbaijan'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Azeri_2   ))] <- 'Azerbaijan'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Armenia))] <- 'Armenia'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Armenia_1))] <- 'Armenia'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_Armenia_2))] <- 'Armenia'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_both))] <- 'Both'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_both_1))] <- 'Both'
rawdata_t2$pow_condition[which(!is.na(rawdata_t2$pow_both_2))] <- 'Both'
rawdata_t2$pow_condition <- as.factor(rawdata_t2$pow_condition)
summary(rawdata_t2$pow_condition)
str(rawdata_t2$pow_condition)

# Outcome variable for pow
rawdata_t2$pow <- NA 
rawdata_t2$pow <- coalesce(rawdata_t2$pow_control, 
                             rawdata_t2$pow_control_1,
                             rawdata_t2$pow_control_2,
                             rawdata_t2$pow_Azeri,
                             rawdata_t2$pow_Azeri_1,
                             rawdata_t2$pow_Azeri_2,
                             rawdata_t2$pow_Armenia,
                             rawdata_t2$pow_Armenia_1,
                             rawdata_t2$pow_Armenia_2,
                             rawdata_t2$pow_both,
                             rawdata_t2$pow_both_2,
                             rawdata_t2$pow_both_2)
rawdata_t2$pow <- as.integer(rawdata_t2$pow)
summary(rawdata_t2$pow)
str(rawdata_t2$pow)

# Condition variable for return
rawdata_t2$return_condition <- NA 
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_control))] <- 'Control'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_control_1))] <- 'Control'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_control_2))] <- 'Control'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Azeri))] <- 'Azerbaijan'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Azeri_1   ))] <- 'Azerbaijan'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Azeri_2   ))] <- 'Azerbaijan'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Armenia))] <- 'Armenia'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Armenia_1))] <- 'Armenia'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_Armenia_2))] <- 'Armenia'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_both))] <- 'Both'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_both_1))] <- 'Both'
rawdata_t2$return_condition[which(!is.na(rawdata_t2$return_both_2))] <- 'Both'
rawdata_t2$return_condition <- as.factor(rawdata_t2$return_condition)
summary(rawdata_t2$return_condition)
str(rawdata_t2$return_condition)

# Outcome variable for return
rawdata_t2$return <- NA 
rawdata_t2$return <- coalesce(rawdata_t2$return_control, 
                             rawdata_t2$return_control_1,
                             rawdata_t2$return_control_2,
                             rawdata_t2$return_Azeri,
                             rawdata_t2$return_Azeri_1,
                             rawdata_t2$return_Azeri_2,
                             rawdata_t2$return_Armenia,
                             rawdata_t2$return_Armenia_1,
                             rawdata_t2$return_Armenia_2,
                             rawdata_t2$return_both,
                             rawdata_t2$return_both_2,
                             rawdata_t2$return_both_2)
rawdata_t2$return <- as.integer(rawdata_t2$return)
summary(rawdata_t2$return)
str(rawdata_t2$return)

# Condition variable for cost experiment_1
rawdata_t2$cost1_condition <- NA 
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_300_Azeri))] <- 'route1_300'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_300_Armenia))] <- 'route1_300'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_300_both))] <- 'route1_300'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_300_ic))] <- 'route1_300'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_300_control))] <- 'route1_300'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_450_Azeri))] <- 'route1_450'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_450_Armenia))] <- 'route1_450'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_450_both))] <- 'route1_450'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_450_ic))] <- 'route1_450'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_450_control))] <- 'route1_450'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_600_Azeri))] <- 'route1_600'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_600_Armenia))] <- 'route1_600'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_600_both))] <- 'route1_600'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_600_ic))] <- 'route1_600'
rawdata_t2$cost1_condition[which(!is.na(rawdata_t2$route1_600_control))] <- 'route1_600'
rawdata_t2$cost1_condition <- as.factor(rawdata_t2$cost1_condition)
summary(rawdata_t2$cost1_condition)
str(rawdata_t2$cost1_condition)

# Outcome variable for cost experiment_1
rawdata_t2$cost1 <- NA 
rawdata_t2$cost1 <- coalesce(rawdata_t2$route1_300_Azeri, 
                             rawdata_t2$route1_300_Armenia,
                             rawdata_t2$route1_300_both,
                             rawdata_t2$route1_300_ic,
                             rawdata_t2$route1_300_control,
                             rawdata_t2$route1_450_Azeri, 
                             rawdata_t2$route1_450_Armenia,
                             rawdata_t2$route1_450_both,
                             rawdata_t2$route1_450_ic,
                             rawdata_t2$route1_450_control,
                             rawdata_t2$route1_600_Azeri, 
                             rawdata_t2$route1_600_Armenia,
                             rawdata_t2$route1_600_both,
                             rawdata_t2$route1_600_ic,
                             rawdata_t2$route1_600_control,)
rawdata_t2$cost1 <- as.integer(rawdata_t2$cost1)
summary(rawdata_t2$cost1)
str(rawdata_t2$cost1)

# Condition variable for cost experiment_2
rawdata_t2$cost2_condition <- NA 
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_300_Azeri))] <- 'route2_300'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_300_Armenia))] <- 'route2_300'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_300_both))] <- 'route2_300'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_300_ic))] <- 'route2_300'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_300_control))] <- 'route2_300'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_450_Azeri))] <- 'route2_450'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_450_Armenia))] <- 'route2_450'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_450_both))] <- 'route2_450'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_450_ic))] <- 'route2_450'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_450_control))] <- 'route2_450'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_600_Azeri))] <- 'route2_600'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_600_Armenia))] <- 'route2_600'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_600_both))] <- 'route2_600'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_600_ic))] <- 'route2_600'
rawdata_t2$cost2_condition[which(!is.na(rawdata_t2$route2_600_control))] <- 'route2_600'
rawdata_t2$cost2_condition <- as.factor(rawdata_t2$cost2_condition)
summary(rawdata_t2$cost2_condition)
str(rawdata_t2$cost2_condition)

# Outcome variable for cost experiment_2
rawdata_t2$cost2 <- NA 
rawdata_t2$cost2 <- coalesce(rawdata_t2$route2_300_Azeri, 
                             rawdata_t2$route2_300_Armenia,
                             rawdata_t2$route2_300_both,
                             rawdata_t2$route2_300_ic,
                             rawdata_t2$route2_300_control,
                             rawdata_t2$route2_450_Azeri, 
                             rawdata_t2$route2_450_Armenia,
                             rawdata_t2$route2_450_both,
                             rawdata_t2$route2_450_ic,
                             rawdata_t2$route2_450_control,
                             rawdata_t2$route2_600_Azeri, 
                             rawdata_t2$route2_600_Armenia,
                             rawdata_t2$route2_600_both,
                             rawdata_t2$route2_600_ic,
                             rawdata_t2$route2_600_control,)
rawdata_t2$cost2 <- as.integer(rawdata_t2$cost2)
summary(rawdata_t2$cost2)
str(rawdata_t2$cost2)

## Delete respondents with missing value on outcomes --------------------------
rawdata_t2$incomplete <- is.na(rawdata_t2$comp_condition) | 
  is.na(rawdata_t2$punish_condition)
summary(rawdata_t2$incomplete)
rawdata_t2 <- subset(rawdata_t2, rawdata_t2$incomplete == F)

# Keep necessary variables only ------------------------------------------------
rawdata_t2 <- rawdata_t2[c(4,152:173)]
summary(rawdata_t2) #20 ID's missing

# Safe cleaned dataset ---------------------------------------------------------
saveRDS(rawdata_t2, "data/T2-clean-data.rds") #safe as .rds
